# -*- coding: utf-8 -*-
import scrapy

from gsww.items import GswwItem


class GswwSpiderSpider(scrapy.Spider):
    name = 'gsww_spider'
    allowed_domains = ['gushiwen.org']
    start_urls = ['https://www.gushiwen.org/default_1.aspx']

    def myprint(self, value):
        print('=' * 30)
        print(value)
        print('=' * 30)

    def parse(self, response):
        parse = response.xpath("//div[@class='left']//div[@class='sons']")
        for data in parse:
            title = data.xpath(".//p//b/text()").getall()
            if title:
                title = ''.join(title)

            chaodai = data.xpath(".//p[@class='source']//a/text()").getall()
            if chaodai:
                dynasty = chaodai[0]
                author = chaodai[1]

            content = data.xpath(".//div[@class='contson']//text()").getall()
            if content:
                new_content = ''.join(content).strip()

            item = GswwItem(author=author, title=title, dynasty=dynasty, new_content=new_content)

            yield item

        resp = response.xpath("//a[@id='amore']/@href").get()
        if resp:
            next_url = response.urljoin(resp)
            req = scrapy.Request(next_url)
            yield req
